From the directory:
O:\FWWR1708\Working\Task 4 - Field\Isotope work\writing\Paper 2. Bruce lead\data\Bruce precip analysis
The data file for measured data is “precip.iso.data_BD.csv”.
For the measured data file, some characterisations and summaries follow.
ddata <- readr::read_csv("data/precip.iso.data_BD.csv")
ddata.orig <- ddata
slice_head(ddata.orig, n = 5)
| site.number | sample.id | year | month | variable | val | agent.number |
|---|---|---|---|---|---|---|
| 1 | 70801 | 2007 | 8 | d18O | -4.41 | 20661 |
| 1 | 70901 | 2007 | 9 | d18O | -5.71 | 20661 |
| 1 | 71001 | 2007 | 10 | d18O | -2.74 | 20661 |
| 1 | 71101 | 2007 | 11 | d18O | -4.72 | 20661 |
| 1 | 71201 | 2007 | 12 | d18O | -2.54 | 20661 |
The variable column contains six “variables”: d18O, dD, d.excess, lat, long.
ddata |> count(variable)
| variable | n |
|---|---|
| d.excess | 1466 |
| d18O | 1466 |
| dD | 1466 |
| lat | 4398 |
| long | 4398 |
The day of the month for the measurement isn’t given, so the measurement is located in the middle of the month.
ddata <- filter(ddata.orig, variable == "d18O") |>
rename(d18O = val, Site = site.number) |>
mutate(Date = lubridate::make_date(year = year, month = month, day = 15))
d2H.
ddata2 <- filter(ddata.orig, variable == "dD") |>
rename(dD = val, Site = site.number) |>
mutate(Date = lubridate::make_date(year = year, month = month, day = 15))
skimr::skim(ddata)
| Name | ddata |
| Number of rows | 1466 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| Date | 1 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| sample.id | 0 | 1 | 5 | 5 | 0 | 1465 | 0 |
| variable | 0 | 1 | 4 | 4 | 0 | 1 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| Date | 0 | 1 | 2007-08-15 | 2010-06-15 | 2008-11-15 | 35 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Site | 0 | 1.00 | 29.34 | 16.26 | 1.00 | 16.00 | 29.00 | 43.00 | 58.00 | ▆▇▇▆▇ |
| year | 0 | 1.00 | 2008.33 | 0.82 | 2007.00 | 2008.00 | 2008.00 | 2009.00 | 2010.00 | ▃▇▁▇▁ |
| month | 0 | 1.00 | 6.82 | 3.52 | 1.00 | 4.00 | 7.00 | 10.00 | 12.00 | ▆▅▃▆▇ |
| d18O | 8 | 0.99 | -6.16 | 2.58 | -16.33 | -7.78 | -5.86 | -4.28 | -0.11 | ▁▁▅▇▂ |
| agent.number | 0 | 1.00 | 22982.15 | 6548.22 | 7524.00 | 19840.00 | 21811.00 | 28760.00 | 31173.00 | ▁▂▅▁▇ |
dD
skimr::skim(ddata2)
| Name | ddata2 |
| Number of rows | 1466 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| Date | 1 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| sample.id | 0 | 1 | 5 | 5 | 0 | 1465 | 0 |
| variable | 0 | 1 | 2 | 2 | 0 | 1 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| Date | 0 | 1 | 2007-08-15 | 2010-06-15 | 2008-11-15 | 35 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Site | 0 | 1 | 29.34 | 16.26 | 1.00 | 16.00 | 29.00 | 43.00 | 58.00 | ▆▇▇▆▇ |
| year | 0 | 1 | 2008.33 | 0.82 | 2007.00 | 2008.00 | 2008.00 | 2009.00 | 2010.00 | ▃▇▁▇▁ |
| month | 0 | 1 | 6.82 | 3.52 | 1.00 | 4.00 | 7.00 | 10.00 | 12.00 | ▆▅▃▆▇ |
| dD | 4 | 1 | -38.07 | 20.86 | -123.39 | -50.78 | -34.74 | -22.84 | 5.54 | ▁▁▃▇▃ |
| agent.number | 0 | 1 | 22982.15 | 6548.22 | 7524.00 | 19840.00 | 21811.00 | 28760.00 | 31173.00 | ▁▂▅▁▇ |
Site information (e.g. lat, long, elevation, district) is in the file “lookup_rainsite_VCSN.csv”. Or at least the closest VCSN.
site.info.orig <- readr::read_csv("data/lookup_rainsite_VCSN.csv")
site.info <- site.info.orig |>
dplyr::select(Site = site_numbe, area, lat, lon = long, height = ELEVATION)
slice_head(site.info, n = 5)
| Site | area | lat | lon | height |
|---|---|---|---|---|
| 1 | Far North District | -35.125 | 173.275 | 34.9724 |
| 2 | Kaipara District | -35.975 | 173.925 | 31.4201 |
| 3 | Whangarei District | -35.575 | 174.475 | 71.3756 |
| 4 | Far North District | -35.425 | 173.825 | 196.6477 |
| 5 | Far North District | -35.325 | 174.125 | 33.8324 |
Site location is shown by North Island and South Island.
iso_plot_site_locations(site.info, "Site", "lat", "lon", "NI")
iso_plot_site_locations(site.info, "Site", "lat", "lon", "SI")
iso_plot_raw(ddata, "Site", "Date", "d18O",
"Raw d18O by site (1-20)",
sites = paste(1:20)
)
iso_plot_raw(ddata, "Site", "Date", "d18O",
"Raw d18O by site (21-41)",
sites = paste(21:41)
)
iso_plot_raw(ddata, "Site", "Date", "d18O",
"Raw d18O by site (42-58)",
sites = paste(42:58)
)
iso_plot_raw(ddata2, "Site", "Date", "dD",
"Raw dD by site (1-20)",
sites = paste(1:20)
)
iso_plot_raw(ddata2, "Site", "Date", "dD",
"Raw dD by site (21-41)",
sites = paste(21:41)
)
iso_plot_raw(ddata2, "Site", "Date", "dD",
"Raw dD by site (42-58)",
sites = paste(42:58)
)
Sites with few data points are dropped, and NA measured values.
ddata <- ddata |>
filter(!(Site %in% c(8, 9, 12, 40, 45))) |>
drop_na(d18O)
Same for dD.
ddata2 <- ddata2 |>
filter(!(Site %in% c(8, 9, 12, 40, 45))) |>
drop_na(dD)
save(ddata, file = "ProcessedData/ddata.RData")
slice_head(ddata, n = 5)
| Site | sample.id | year | month | variable | d18O | agent.number | Date |
|---|---|---|---|---|---|---|---|
| 1 | 70801 | 2007 | 8 | d18O | -4.41 | 20661 | 2007-08-15 |
| 1 | 70901 | 2007 | 9 | d18O | -5.71 | 20661 | 2007-09-15 |
| 1 | 71001 | 2007 | 10 | d18O | -2.74 | 20661 | 2007-10-15 |
| 1 | 71101 | 2007 | 11 | d18O | -4.72 | 20661 | 2007-11-15 |
| 1 | 71201 | 2007 | 12 | d18O | -2.54 | 20661 | 2007-12-15 |
save(ddata2, file = "ProcessedData/ddata2.RData")
slice_head(ddata2, n = 5)
| Site | sample.id | year | month | variable | dD | agent.number | Date |
|---|---|---|---|---|---|---|---|
| 1 | 70801 | 2007 | 8 | dD | -24.44 | 20661 | 2007-08-15 |
| 1 | 70901 | 2007 | 9 | dD | -33.89 | 20661 | 2007-09-15 |
| 1 | 71001 | 2007 | 10 | dD | -11.46 | 20661 | 2007-10-15 |
| 1 | 71101 | 2007 | 11 | dD | -28.23 | 20661 | 2007-11-15 |
| 1 | 71201 | 2007 | 12 | dD | -16.98 | 20661 | 2007-12-15 |
An overview of the virtual climate station data is available here:
https://niwa.co.nz/climate/our-services/virtual-climate-stations
“Virtual Climate station Network (VCSN) data are estimates of daily rainfall, potential evapotranspiration, air and vapour pressure, maximum and minimum air temperature, soil temperature, relative humidity, solar radiation, wind speed and soil moisture on a regular (~5km) grid covering the whole of New Zealand. The estimates are produced every day, based on the spatial interpolation of actual data observations made at climate stations located around the country.”
One form in which the VCSN data are available is a series of yearly zipped up files in the directory.
“Q:/CLIMATE/vcsn_data/”
The CRS seems to be NZGD 1949
https://one.niwa.co.nz/display/CLIDB/VCSN+Grid+Points
As described here:
With EPSG number of 27258
Some function were made to explore and extract the VCSN data:
vcsn_info()vcsn_agent_locations()vcsn_append_nearest_vcsn()vcsn_append_climate_information()These functions are used below.
Set up the directory with the VCSN climate data. Sometimes before extracting data, the Q drive will need to be “woken up” by clicking on it in Windows Explorer.
# VCSN.directory <- "Q:/CLIMATE/vcsn_data/"
# vcsn_info(VCSN.directory)
Find all the vcsn climate station/agent locations. The output is an sf class object, and can be plotted.
# vcsn.agent.locations <- vcsn_agent_locations(VCSN.directory)
# save(vcsn.agent.locations, file = "ProcessedData/vcsn.agent.locations.RDdata")
load(file = "ProcessedData/vcsn.agent.locations.RDdata")
slice_head(vcsn.agent.locations, n = 5)
| Agent | vcsn.lon | vcsn.lat | geometry |
|---|---|---|---|
| 3027 | 166.825 | -45.325 | POINT (166.825 -45.325) |
| 3106 | 166.825 | -45.375 | POINT (166.825 -45.375) |
| 3380 | 166.825 | -45.425 | POINT (166.825 -45.425) |
| 4782 | 166.825 | -45.475 | POINT (166.825 -45.475) |
| 5878 | 166.825 | -45.525 | POINT (166.825 -45.525) |
plot(vcsn.agent.locations["Agent"], cex = 0.5, pch = 16)
Append nearest agent information and daily climate data for 2007–2022. This takes a while, so is done once and the results saved, then loaded.
# years <- 2023
# site.info.plus.climate.data <- vcsn_append_climate_information(site.info,VCSN.directory,
# years)
# save(site.info.plus.climate.data, file = "output/site.info.plus.climate.data.RData")
load(file = "ProcessedData/site.info.plus.climate.data.RData")
slice_head(site.info.plus.climate.data, n = 5)
| Site | area | lat | lon | height | VCSN.Agent | VCSN.lat | VCSN.lon | Date | MSLP | PET | Rain | RH | SoilM | ETmp | Rad | TMax | Tmin | VP | Wind | Rain_bc | Tmax_N | Tmin_N |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Far North District | -35.125 | 173.275 | 34.9724 | 20661 | -35.125 | 173.275 | 2007-01-01 | 1015.4 | 4.7 | 0 | 76.2 | -113.3 | 16.4 | 28.3 | 20.1 | 8.4 | 12.6 | 3.9 | 0 | 19.9 | 8.2 |
| 2 | Kaipara District | -35.975 | 173.925 | 31.4201 | 21431 | -35.975 | 173.925 | 2007-01-01 | 1014.5 | 4.3 | 0 | 73.4 | -117.4 | 17.2 | 23.9 | 19.8 | 9.4 | 12.7 | 3.7 | 0 | 19.7 | 9.5 |
| 3 | Whangarei District | -35.575 | 174.475 | 71.3756 | 21815 | -35.575 | 174.475 | 2007-01-01 | 1014.6 | 4.2 | 0 | 71.0 | -118.3 | 17.4 | 26.9 | 21.2 | 10.4 | 12.6 | 3.6 | 0 | 21.5 | 10.4 |
| 4 | Far North District | -35.425 | 173.825 | 196.6477 | 29644 | -35.425 | 173.825 | 2007-01-01 | 1015.0 | 4.1 | 0 | 74.3 | -91.2 | 16.5 | 20.6 | 19.9 | 7.8 | 12.1 | 3.1 | 0 | 19.8 | 7.8 |
| 5 | Far North District | -35.325 | 174.125 | 33.8324 | 21536 | -35.325 | 174.125 | 2007-01-01 | 1015.0 | 4.1 | 0 | 72.2 | -112.2 | 17.8 | 26.2 | 22.0 | 9.7 | 12.4 | 2.8 | 0 | 22.4 | 9.6 |
The climate fields are identified as
https://one.niwa.co.nz/display/CLIDB/VCSN+Times+and+Field+Descriptions
https://one.niwa.co.nz/display/SYSTEMS/The+size+of+the+problem
Potential evaporation or potential evapotranspiration is defined as the amount of evaporation that would occur if a sufficient water source were available.
Rain_bc, Tmax_n, Tmin_N were introduced in 2019.
Convert to long format and make a summary via the mean value 2007–2022 by site.
site.info.plus.climate.data.long <- site.info.plus.climate.data |>
relocate(height, .after = "Date") |>
pivot_longer(height:Tmin_N, names_to = "Site quantity", values_to = "Value") |>
dplyr::select(-c("area", "lat", "lon"))
slice_head(site.info.plus.climate.data.long, n = 5)
| Site | VCSN.Agent | VCSN.lat | VCSN.lon | Date | Site quantity | Value |
|---|---|---|---|---|---|---|
| 1 | 20661 | -35.125 | 173.275 | 2007-01-01 | height | 34.9724 |
| 1 | 20661 | -35.125 | 173.275 | 2007-01-01 | MSLP | 1015.4000 |
| 1 | 20661 | -35.125 | 173.275 | 2007-01-01 | PET | 4.7000 |
| 1 | 20661 | -35.125 | 173.275 | 2007-01-01 | Rain | 0.0000 |
| 1 | 20661 | -35.125 | 173.275 | 2007-01-01 | RH | 76.2000 |
# mean values over 2007-2022 (inclusive)
site.climate.summary <- site.info.plus.climate.data.long |>
group_by(Site, VCSN.lat, VCSN.lon, `Site quantity`) |>
summarise(mean = mean(Value, na.rm = TRUE)) |>
pivot_wider(names_from = `Site quantity`, values_from = mean) |>
ungroup()
slice_head(site.climate.summary, n = 5)
| Site | VCSN.lat | VCSN.lon | ETmp | MSLP | PET | RH | Rad | Rain | Rain_bc | SoilM | TMax | Tmax_N | Tmin | Tmin_N | VP | Wind | height |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | -35.125 | 173.275 | 16.02943 | 1017.120 | 2.936140 | 84.48234 | 15.37604 | 3.968789 | 3.975873 | -46.10077 | 20.32240 | 20.36490 | 12.33893 | 12.33280 | 14.89418 | 4.105151 | 34.9724 |
| 2 | -35.975 | 173.925 | 16.16725 | 1016.914 | 2.860849 | 82.87960 | 14.88270 | 3.070722 | 2.976318 | -55.18568 | 19.90358 | 19.82599 | 11.80717 | 11.81980 | 14.66994 | 3.465092 | 31.4201 |
| 3 | -35.575 | 174.475 | 15.78417 | 1017.205 | 2.786037 | 82.68727 | 14.51177 | 3.812200 | 4.133676 | -46.91085 | 20.11838 | 20.06369 | 12.59993 | 12.46001 | 14.84321 | 3.164493 | 71.3756 |
| 4 | -35.425 | 173.825 | 15.24098 | 1016.628 | 2.695568 | 85.71193 | 14.53467 | 4.424469 | 4.088518 | -34.59187 | 19.25228 | 19.27521 | 11.21182 | 11.31184 | 14.58049 | 2.951643 | 196.6477 |
| 5 | -35.325 | 174.125 | 16.13272 | 1017.069 | 2.780869 | 83.63652 | 14.42936 | 3.949726 | 3.543600 | -41.56413 | 20.64189 | 20.59630 | 12.34612 | 12.00613 | 14.87118 | 2.330493 | 33.8324 |
rm(site.info.plus.climate.data.long)
Calculate mean annual range of monthly temperatures:
Take monthly average daily maximum temperature for each site and month of the time series
Pick the highest month for each year
Pick the lowest for each year
Subtract lowest from highest for each year to give annual range
Take the average range across all years == mean annual range of monthly temperatures
# Convert to date if not already
site.info.plus.climate.data$Date <- as.Date(site.info.plus.climate.data$Date)
# Get months
site.info.plus.climate.data$Month <- months(site.info.plus.climate.data$Date)
# Get years
site.info.plus.climate.data$Year <- format(site.info.plus.climate.data$Date,format="%y")
#Get mean annual range of maximum daily temperatures (averaged for each month) each VCSN.Agent, month and year combination
aggy <- aggregate(TMax ~ VCSN.Agent + Site + Month + Year, data = site.info.plus.climate.data, mean)
aggyMAX <- aggregate(TMax ~ VCSN.Agent + Site + Year, data = aggy, max)
aggyMIN <- aggregate(TMax ~ VCSN.Agent + Site + Year, data = aggy, min)
aggyRANGE <- aggyMIN
aggyRANGE$AnnualTempRange <- aggyMAX$TMax - aggyMIN$TMax
annualranges <- aggregate(AnnualTempRange ~ VCSN.Agent + Site, data = aggyRANGE, mean)
site.climate.summary<-dplyr::left_join(site.climate.summary, annualranges, by = "Site")
rm(site.info.plus.climate.data)
rm(aggy)
rm(aggyMAX)
rm(aggyMIN)
rm(aggyRANGE)
Now make the same VCSN summary file for 2007-2022, but across all VCSN points so that we can use it to extrapolate regression relationships across the country
slice_head(vcsn.agent.locations, n = 5)
| Agent | vcsn.lon | vcsn.lat | geometry |
|---|---|---|---|
| 3027 | 166.825 | -45.325 | POINT (166.825 -45.325) |
| 3106 | 166.825 | -45.375 | POINT (166.825 -45.375) |
| 3380 | 166.825 | -45.425 | POINT (166.825 -45.425) |
| 4782 | 166.825 | -45.475 | POINT (166.825 -45.475) |
| 5878 | 166.825 | -45.525 | POINT (166.825 -45.525) |
dim(vcsn.agent.locations)
[1] 11491 4
#get list of all VCSN agent numbers and elevations
elevations<-read.csv("data/VCS_elevations.csv")
keeps<-subset(elevations, select = c(AGENT_NO, ELEVATION))
names(keeps)<-c("Agent", "elevation")
elevations<-keeps
# # test run
# # vcsn.2007.2022 <- vcsn_combine_daily_data(VCSN.directory, years = 2007)
# # test run went fine but the line below took over a day and needs to be run on the modelling computer
# # vcsn.2007.2022 <- vcsn_combine_daily_data(VCSN.directory, years = 2007:2022)
# # save(vcsn.2007.2022, file = "output/vcsn.2007.2022.RData")
# load("output/vcsn.2007.2022.RData")
# # calculate annual ranges of monthly average temperatures
# # Convert to date if not already
# vcsn.2007.2022$Date <- as.Date(vcsn.2007.2022$Date)
# # Get months
# vcsn.2007.2022$Month <- months(vcsn.2007.2022$Date)
# # Get years
# vcsn.2007.2022$Year <- format(vcsn.2007.2022$Date,format="%y")
# # Get mean annual range of maximum daily temperatures (averaged for each month) for each VCSN.Agent, month and year combination
# aggy1 <- aggregate(TMax ~ Agent + Month + Year, data = vcsn.2007.2022, mean)
#
# aggyMAX <- aggregate(TMax ~ Agent + Year, data = aggy1, max)
# aggyMIN <- aggregate(TMax ~ Agent + Year, data = aggy1, min)
#
# aggyRANGE<-aggyMIN
# aggyRANGE$AnnualTempRange<-aggyMAX$TMax - aggyMIN$TMax
#
# annualranges<-aggregate(AnnualTempRange ~ Agent, data = aggyRANGE, mean)
# vcsn.mean <- vcsn.2007.2022 |>
# dplyr::select(-Tmin, -TMax,
# -Rain) |>
# rename(lat = Lat, lon = Longt) |>
# group_by(Agent, lat, lon) |>
# summarise(across(.cols = MSLP:Rain_bc, .fns = ~mean(.x, na.rm = TRUE)))
#
# national.climate.summary<-dplyr::left_join(vcsn.mean, annualranges, by = "Agent")
# #add elevations
# national.climate.summary<-dplyr::left_join(national.climate.summary, elevations, by = "Agent")
#
# names(vcsn.mean)
# vcsn.mean<-national.climate.summary[,1:14]
# names(vcsn.mean)<-c("Agent", "lat", "lon", "MSLP", "PET" , "RH" , "SoilM" , "ETmp"
# ,"Rad" , "VP" , "Wind" , "Rain_bc" , "AnnualTempRange", "height")
# removing large R objects
# rm(vcsn.2007.2022)
# rm(aggy1)
# rm(aggyMAX)
# rm(aggyMIN)
# rm(aggyRANGE)
# save(national.climate.summary, file = "ProcessedData/national.climate.summary.RData")
# save(vcsn.mean, file = "ProcessedData/vcsn.mean.RData")
load("ProcessedData/national.climate.summary.RData")
load("ProcessedData/vcsn.mean.RData")
Introduce an “island” column with values South Island/North Island.
site.climate.summary <- site.climate.summary |>
mutate(island = if_else(VCSN.lon <= 174.49 & VCSN.lat <= -40.18,
"South Island",
"North Island")) |>
relocate(island, .after = Site)
Change some names and add the same “island” column for the national VCSN dataframe for mapping later
names(national.climate.summary)<-c("VCSN.Agent", "VCSN.lat", "VCSN.lon", "MSLP", "PET", "RH", "SoilM", "ETmp",
"Rad", "VP", "Wind", "Rain_bc", "AnnualTempRange", "height")
national.climate.summary <- national.climate.summary |>
mutate(island = if_else(VCSN.lon <= 174.49 & VCSN.lat <= -40.18,
"South Island",
"North Island")) |>
relocate(island, .after = height)
Make climate summary by site data frame into an sf object. Make climate summary by site data frame into an sf object.
site.climate.summary.sf <- site.climate.summary |>
sf::st_as_sf(coords = c("VCSN.lon", "VCSN.lat"), crs = 4326)
Make national climate summary into an sf object…and take a look at the annual temperature range nationally
vcsn.agent.locations1 <- vcsn.agent.locations
names(vcsn.agent.locations1) <- c("VCSN.Agent", "vcsn.lon", "vcsn.lat", "geometry")
national.climate.summary.sf <- dplyr::full_join(national.climate.summary,
vcsn.agent.locations1,
by = "VCSN.Agent")
national.climate.summary.sf <- st_as_sf(national.climate.summary.sf)
plot(national.climate.summary.sf["AnnualTempRange"], cex = 0.5, pch = 16)
Plot showing mean rainfall and soil moisture at sites. Both have potential to be proxies for orographic effects due to mountain ranges.
Rain_bc the bias corrected version of Rain is used.
plot(site.climate.summary.sf["Rain_bc"], cex = 0.5, pch = 16)
nzmap <- rnaturalearth::ne_countries(scale = 'large',
country = "New Zealand",
returnclass = 'sf')
ggplot() +
geom_sf(data = nzmap, fill = "tan1") +
geom_sf(data = site.climate.summary.sf,
mapping = aes(size = Rain_bc, colour = Rain)) +
coord_sf(xlim = c(164.8, 179.4), ylim = c(-47.7, -33.8)) +
scale_colour_viridis_c() +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Mean daily rainfall (2007-2022)")
ggplot() +
geom_sf(data = nzmap, fill = "tan1") +
geom_sf(data = site.climate.summary.sf,
mapping = aes(size = SoilM, colour = SoilM)) +
coord_sf(xlim = c(164.8, 179.4), ylim = c(-47.7, -33.8)) +
scale_colour_viridis_c() +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Mean soil moisture (2007-2022)")
ggplot() +
geom_sf(data = nzmap, fill = "tan1") +
geom_sf(data = site.climate.summary.sf,
mapping = aes(size = -SoilM, colour = -SoilM)) +
coord_sf(xlim = c(164.8, 179.4), ylim = c(-47.7, -33.8)) +
scale_colour_viridis_c() +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Negative mean soil moisture (2007-2022)")
The soil moisture plateaus as rainfall increases.
ggplot(site.climate.summary.sf, aes(x = Rain_bc, y = SoilM)) +
geom_point(colour = "blue") +
xlab("Mean daily rainfall") +
ylab("Mean soil moisture") +
ggtitle("Sites: soil moisture vs rainfall (mean 2007-09)")
There is high correlation between many VCSN variables, and for the purposes of latter regression/correlation analyses with estimated sinusoidal parameters, some VCSN variables can be dropped.
Concentrating on “temperature” variables, Tmin and TMax are highly correlated with the variables Tmin_N, and Tmax_n, which in turn are highly correlated with ETmp (earth temperature at 10cm depth).
So ETmp is retained but the other four are dropped (Tmin, TMax, Tmin_N, Tmax_N).
We will also retain the AnnualTempRange metric
site.T <- site.climate.summary |>
dplyr::select(ETmp, Rad, TMax, Tmin, Tmax_N, Tmin_N)
GGally::ggpairs(site.T)
corrplot::corrplot(cor(site.T), diag = FALSE, order = "hclust" )
Concentrating on “non-temperature” variables. Rain is dropped and the bias-corrected version Rain_bc is retained.
site.non.T <- site.climate.summary |>
dplyr::select(MSLP, PET, Rain, SoilM, VP, Wind, Rain_bc, height, AnnualTempRange)
GGally::ggpairs(site.non.T)
corrplot::corrplot(cor(site.non.T), diag = FALSE, order = "hclust" )
site.remain <- site.climate.summary |>
dplyr::select(-Site, -island,
-TMax, -Tmin, -Tmax_N, -Tmin_N,
-Rain)
GGally::ggpairs(site.remain)
corrplot::corrplot(cor(site.remain), diag = FALSE, order = "hclust" )
These data are need for the next block of code (01-isoscape-model-fits-regressions.Rmd).
save(ddata, file = "Output/Data/ddata.RData")
dim(ddata)
[1] 1436 8
names(ddata)
[1] "Site" "sample.id" "year" "month" "variable"
[6] "d18O" "agent.number" "Date"
Mean value of climate variable at a site, for the closest agent. Taking the mean value over all data for 2007 to 2022.
save(site.climate.summary, file = "Output/Data/site.climate.summary.RData")
dim(site.climate.summary)
[1] 55 21
names(site.climate.summary)
[1] "Site" "island" "VCSN.lat" "VCSN.lon"
[5] "ETmp" "MSLP" "PET" "RH"
[9] "Rad" "Rain" "Rain_bc" "SoilM"
[13] "TMax" "Tmax_N" "Tmin" "Tmin_N"
[17] "VP" "Wind" "height" "VCSN.Agent"
[21] "AnnualTempRange"
save(vcsn.agent.locations, file = "Output/Data/vcsn.agent.locations.RData")
dim(vcsn.agent.locations)
[1] 11491 4
names(vcsn.agent.locations)
[1] "Agent" "vcsn.lon" "vcsn.lat" "geometry"
Same things, but slightly different column names.
save(vcsn.agent.locations1, file = "Output/Data/vcsn.agent.locations1.RData")
dim(vcsn.agent.locations1)
[1] 11491 4
names(vcsn.agent.locations1)
[1] "VCSN.Agent" "vcsn.lon" "vcsn.lat" "geometry"
Mean value of climate data at an agent location. Taking the mean over all data from 2007 to 2022. Also includes height at the agent location.
save(vcsn.mean, file = "Output/Data/vcsn.mean.RData")
dim(vcsn.mean)
[1] 11491 14
names(vcsn.mean)
[1] "Agent" "lat" "lon" "MSLP"
[5] "PET" "RH" "SoilM" "ETmp"
[9] "Rad" "VP" "Wind" "Rain_bc"
[13] "AnnualTempRange" "height"
A slightly different version:
save(national.climate.summary, file = "Output/Data/national.climate.summary.RData")
dim(national.climate.summary)
[1] 11491 15
names(national.climate.summary)
[1] "VCSN.Agent" "VCSN.lat" "VCSN.lon" "MSLP"
[5] "PET" "RH" "SoilM" "ETmp"
[9] "Rad" "VP" "Wind" "Rain_bc"
[13] "AnnualTempRange" "height" "island"